Introduction

This lab was created by the developer of the R package prioritizr Jeffrey O. Hanson. I made slight tweaks to streamline code and reduce text.

Overview

The aim of this workshop is to help you get started with using the prioritizr R package for systematic conservation planning. It is not designed to give you a comprehensive overview and you will not become an expert after completing this workshop. Instead, we want to help you understand the core principles of conservation planning and guide you through some of the common tasks involved with developing prioritizations. In other words, we want to give you the knowledge base and confidence needed to start applying systematic conservation planning to your own work.

Data Set-up

dir_data <- here("data/prioritizr")
pu_shp   <- file.path(dir_data, "pu.shp")
pu_url   <- "https://github.com/prioritizr/massey-workshop/raw/main/data.zip"
pu_zip   <- file.path(dir_data, basename(pu_url))
vegetation_tif <- file.path(dir_data, "vegetation.tif")

dir.create(dir_data, showWarnings = F, recursive = T)
if (!file.exists(pu_shp)){
  download.file(pu_url, pu_zip)
  unzip(pu_zip, exdir = dir_data)
  dir_unzip   <- file.path(dir_data, "data")
  files_unzip <- list.files(dir_unzip, full.names = T)
  file.rename(
    files_unzip, 
    files_unzip %>% str_replace("prioritizr/data", "prioritizr"))
  unlink(c(pu_zip, dir_unzip), recursive = T)
}

Data

2.1 Data Import

# import planning unit data
pu_data <- as(read_sf(pu_shp), "Spatial")

# format columns in planning unit data
pu_data$locked_in <- as.logical(pu_data$locked_in)
pu_data$locked_out <- as.logical(pu_data$locked_out)

# import vegetation data
veg_data <- stack(vegetation_tif)

2.2 Planning Unit Data

The planning unit data contains spatial data describing the geometry for each planning unit and attribute data with information about each planning unit (e.g. cost values). Let’s investigate the pu_data object. The attribute data contains 5 columns with contain the following information:

  • id: unique identifiers for each planning unit
  • cost: acquisition cost values for each planning unit (millions of Australian dollars).
  • status: status information for each planning unit (only relevant with Marxan)
  • locked_in: logical values (i.e. TRUE/FALSE) indicating if planning units are covered by protected areas or not.
  • locked_out: logical values (i.e. TRUE/FALSE) indicating if planning units cannot be managed as a protected area because they contain are too degraded.
# print a short summary of the data
print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 5
## names       :   id,             cost, status, locked_in, locked_out 
## min values  :  557, 3.59717531470679,      0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1
# plot the planning unit data
plot(pu_data)

# plot an interactive map of the planning unit data
mapview(pu_data)
# print the structure of object
str(pu_data, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  516 obs. of  5 variables:
##   ..@ polygons   :List of 516
##   ..@ plotOrder  : int [1:516] 69 104 1 122 157 190 4 221 17 140 ...
##   ..@ bbox       : num [1:2, 1:2] 348703 5167775 611932 5344516
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# print the class of the object
class(pu_data)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# print the slots of the object
slotNames(pu_data)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
# print the coordinate reference system
print(pu_data@proj4string)
## Coordinate Reference System:
## Deprecated Proj.4 representation:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## WKT2 2019 representation:
## PROJCRS["WGS 84 / UTM zone 55S",
##     BASEGEOGCRS["WGS 84",
##         DATUM["World Geodetic System 1984",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["UTM zone 55S",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",147,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",10000000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     ID["EPSG",32755]]
# print number of planning units (geometries) in the data
nrow(pu_data)
## [1] 516
# print the first six rows in the data
head(pu_data@data)
##    id     cost status locked_in locked_out
## 1 557 29.74225      0     FALSE      FALSE
## 2 558 29.87703      0     FALSE      FALSE
## 3 574 28.60687      0     FALSE      FALSE
## 4 575 30.83416      0     FALSE      FALSE
## 5 576 38.75511      0     FALSE      FALSE
## 6 577 38.11618      2      TRUE      FALSE
# print the first six values in the cost column of the attribute data
head(pu_data$cost)
## [1] 29.74225 29.87703 28.60687 30.83416 38.75511 38.11618
# print the highest cost value
max(pu_data$cost)
## [1] 47.23834
# print the smallest cost value
min(pu_data$cost)
## [1] 3.597175
# print average cost value
mean(pu_data$cost)
## [1] 26.87393
# plot a map of the planning unit cost data
spplot(pu_data, "cost")

# plot an interactive map of the planning unit cost data
mapview(pu_data, zcol = "cost")

Questions

  1. How many planning units are in the planning unit data? There are 516 planning units in the data.

  2. What is the highest cost value? The highest cost value is 47.2383364.

  3. Is there a spatial pattern in the planning unit cost values?

Based on the map, the eastern side of Tasmania has planning units with lower costs ranging from 0 - 15. The Western side of Tasmania has mid cost planning units and the north-central plots are the most expensive with planning unit cost values up to 50.

2.3 Vegetation Data

The vegetation data describe the spatial distribution of 32 vegetation classes in the study area. This data is in a raster format and so the data are organized using a grid comprising square grid cells that are each the same size. In our case, the raster data contains multiple layers (also called “bands”) and each layer has corresponds to a spatial grid with exactly the same area and has exactly the same dimensionality (i.e. number of rows, columns, and cells). In this dataset, there are 32 different regular spatial grids layered on top of each other – with each layer corresponding to a different vegetation class – and each of these layers contains a grid with 164 rows, 326 columns, and 53464 cells. Within each layer, each cell corresponds to a 0.967 by 1.02 km square. The values associated with each grid cell indicate the (one) presence or (zero) absence of a given vegetation class in the cell.

# print a short summary of the data
print(veg_data)
## class      : RasterStack 
## dimensions : 164, 326, 53464, 32  (nrow, ncol, ncell, nlayers)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## names      : vegetation.1, vegetation.2, vegetation.3, vegetation.4, vegetation.5, vegetation.6, vegetation.7, vegetation.8, vegetation.9, vegetation.10, vegetation.11, vegetation.12, vegetation.13, vegetation.14, vegetation.15, ... 
## min values :            0,            0,            0,            0,            0,            0,            0,            0,            0,             0,             0,             0,             0,             0,             0, ... 
## max values :            1,            1,            1,            1,            1,            1,            1,            1,            1,             1,             1,             1,             1,             1,             1, ...
# plot a map of the 20th vegetation class
plot(veg_data[[20]])

# plot an interactive map of the 20th vegetation class
mapview(veg_data[[20]])
# print number of rows in the data
nrow(veg_data)
## [1] 164
# print number of columns in the data
ncol(veg_data)
## [1] 326
# print number of cells in the data
ncell(veg_data)
## [1] 53464
# print number of layers in the data
nlayers(veg_data)
## [1] 32
# print  resolution on the x-axis
xres(veg_data)
## [1] 967
# print resolution on the y-axis
yres(veg_data)
## [1] 1020
# print spatial extent of the grid, i.e. coordinates for corners
extent(veg_data)
## class      : Extent 
## xmin       : 298636.7 
## xmax       : 613878.7 
## ymin       : 5167756 
## ymax       : 5335036
# print the coordinate reference system
print(veg_data@crs)
## Coordinate Reference System:
## Deprecated Proj.4 representation:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## WKT2 2019 representation:
## PROJCRS["WGS 84 / UTM zone 55S",
##     BASEGEOGCRS["WGS 84",
##         DATUM["World Geodetic System 1984",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["UTM zone 55S",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",147,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",10000000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["World - S hemisphere - 144°E to 150°E - by country"],
##         BBOX[-80,144,0,150]],
##     ID["EPSG",32755]]
# print a summary of the first layer in the stack
print(veg_data[[1]])
## class      : RasterLayer 
## band       : 1  (of  32  bands)
## dimensions : 164, 326, 53464  (nrow, ncol, ncell)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## source     : vegetation.tif 
## names      : vegetation.1 
## values     : 0, 1  (min, max)
# print the value in the 800th cell in the first layer of the stack
print(veg_data[[1]][800])
##   
## 0
# calculate the sum of all the cell values in the first layer
cellStats(veg_data[[1]], "sum")
## [1] 17
# calculate the maximum value of all the cell values in the first layer
cellStats(veg_data[[1]], "max")
## [1] 1
# calculate the minimum value of all the cell values in the first layer
cellStats(veg_data[[1]], "min")
## [1] 0
# calculate the mean value of all the cell values in the first layer
cellStats(veg_data[[1]], "mean")
## [1] 0.00035239

Questions

  1. What part of the study area is the 13th vegetation class found in (hint: make a map)? For instance, is it in the south-eastern part of the study area?
mapview(veg_data[[13]])

The majority of the 13th vegetation class is found in the north-eastern part of the study area.

  1. What proportion of cells contain the 12th vegetation class?
num_cells <- ncell(veg_data)
twelve_cells <- cellStats(veg_data[[12]], "sum")

twelve_proportion <- twelve_cells / num_cells
twelve_percent <- round((twelve_proportion * 100), 2)

1.53% of cells contain the 12th vegetation class.

  1. Which vegetation class is the most abundant (i.e. present in the greatest number of cells)?
all_stats <- cellStats(veg_data, "sum", na.rm = TRUE)

which.max(all_stats)
## vegetation.12 
##            12

12 is the vegetation class is the most abundant.

3 Gap Analysis

This step is critical: we cannot develop plans to improve conservation of biodiversity if we don’t understand how well existing policies are currently conserving biodiversity! To achieve this, we can perform a “gap analysis.” A gap analysis involves calculating how well each of our biodiversity features (i.e. vegetation classes in this exercise) are represented (covered) by protected areas. Next, we compare current representation by protected areas of each feature (e.g. 5% of their spatial distribution covered by protected areas) to a target threshold (e.g. 20% of their spatial distribution covered by protected areas). This target threshold denotes the minimum amount (e.g. minimum proportion of spatial distribution) that we need of each feature to be represented in the protected area system.

Feature Abundance

# create prioritizr problem with only the data 
p0 <- problem(pu_data, veg_data, cost_colum = "cost")

# print empty problem 
# we can see that only the cost and feature data are defined
print(p0)

abundance_data <- feature_abundances(p0)

print(abundance_data)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## # … with 22 more rows
# only the first 10 rows are printed because the abundance_data is a tible and not a data.frame 

print(class(abundance_data))
## [1] "tbl_df"     "tbl"        "data.frame"
print(abundance_data, n = Inf)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## 11 vegetation.11               23.6                  1
## 12 vegetation.12              748.                   1
## 13 vegetation.13              126.                   1
## 14 vegetation.14               10.5                  1
## 15 vegetation.15               17.5                  1
## 16 vegetation.16               15.0                  1
## 17 vegetation.17              213.                   1
## 18 vegetation.18               14.3                  1
## 19 vegetation.19               17.1                  1
## 20 vegetation.20               21.4                  1
## 21 vegetation.21               18.6                  1
## 22 vegetation.22              297.                   1
## 23 vegetation.23               20.3                  1
## 24 vegetation.24              165.                   1
## 25 vegetation.25              716.                   1
## 26 vegetation.26               24.0                  1
## 27 vegetation.27               18.8                  1
## 28 vegetation.28               17.5                  1
## 29 vegetation.29               24.7                  1
## 30 vegetation.30               59.0                  1
## 31 vegetation.31               60.0                  1
## 32 vegetation.32               32                    1

The abundance_data object contains three columns. The feature column contains the name of each feature (derived from names(veg_data)), the absolute_abundance column contains the total amount of each feature in all the planning units, and the relative_abundance column contains the total amount of each feature in the planning units expressed as a proportion of the total amount in the underlying raster data. Since all the raster cells containing vegetation overlap with the planning units, all of the values in the relative_abundance column are equal to one (meaning 100%). So the relative_abundance per feature is a measure of the ‘percent presence’ of that feature across all planning units (100% or 1 in the case of all these vegetation layers, which is not interesting), whereas absolute_abundance measures the total amount of that feature when the value for all planning units is added up.

# add new column with feature abundances in km^2

abundance_data$absolute_abundance_km2 <- 
  (abundance_data$absolute_abundance * prod(res(veg_data))) %>% 
  set_units(m^2) %>% 
  set_units(km^2)

print(abundance_data)
## # A tibble: 32 × 4
##    feature       absolute_abundance relative_abundance absolute_abundance_km2
##    <chr>                      <dbl>              <dbl>                 [km^2]
##  1 vegetation.1                16.0                  1                   15.8
##  2 vegetation.2                14.3                  1                   14.1
##  3 vegetation.3                10.4                  1                   10.2
##  4 vegetation.4                17.8                  1                   17.6
##  5 vegetation.5                13.0                  1                   12.8
##  6 vegetation.6                14.3                  1                   14.1
##  7 vegetation.7                20.0                  1                   19.7
##  8 vegetation.8                14.0                  1                   13.9
##  9 vegetation.9                18.0                  1                   17.8
## 10 vegetation.10               20.0                  1                   19.7
## # … with 22 more rows
mean(abundance_data$absolute_abundance_km2)
## 86.82948 [km^2]
hist(abundance_data$absolute_abundance_km2, main = "Feature abundances")

max(abundance_data$absolute_abundance_km2)
## 737.982 [km^2]
abundance_data$feature[which.max(abundance_data$absolute_abundance_km2)]
## [1] "vegetation.12"
sum(abundance_data$absolute_abundance_km2 > set_units(100, km^2))
## [1] 6

Questions

  1. What is the median abundance of the features?

The median abundance of the features is 19.1165038783561.

  1. What is the name of the feature with the smallest abundance?

The name of the feature with the smallest abundance is vegetation.3

  1. How many features have a total abundance greater than 100 km^2?

6 features have a total abundance greater than 100km^2.

Feature Representation

After calculating the total amount of each feature in the planning units (i.e. the features’ abundances), we will now calculate the amount of each feature in the planning units that are covered by protected areas (i.e. feature representation by protected areas).

# create column in planning unit data withbinary values
# indicqting if a planning unit is covered by protected areas or not 
pu_data$pa_status <- as.numeric(pu_data$locked_in)

# calculate feature representation by protected areas 
repr_data <- eval_feature_representation_summary(p0, pu_data[, "pa_status"])

print(repr_data)
## # A tibble: 32 × 5
##    summary feature       total_amount absolute_held relative_held
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>
##  1 overall vegetation.1          16.0         0            0     
##  2 overall vegetation.2          14.3         0            0     
##  3 overall vegetation.3          10.4         0            0     
##  4 overall vegetation.4          17.8         0            0     
##  5 overall vegetation.5          13.0         0            0     
##  6 overall vegetation.6          14.3         0            0     
##  7 overall vegetation.7          20.0         0            0     
##  8 overall vegetation.8          14.0         0            0     
##  9 overall vegetation.9          18.0         0.846        0.0470
## 10 overall vegetation.10         20.0         0            0     
## # … with 22 more rows

The feature column contains the name of each feature, the absolute_held column shows the total amount of each feature held in the solution (i.e. the planning units covered by protected areas), and the relative_held column shows the proportion of each feature held in the solution (i.e. the proportion of each feature’s spatial distribution held in protected areas). So the absolute_held is an amount up to but not exceeding the original absolute_abundance of that feature across all planning units (see above) based on those planning units in the solution, and the relative_held is like the percent of planning units in the solution that had this feature present. Since the absolute_held values correspond to the number of grid cells in the veg_data object with overlap with protected areas, let’s convert them to area units (i.e. km2) so we can report them.

# add new column with the areas represented in km^2 
repr_data$absolute_held_km2 <- 
  (repr_data$absolute_held * prod(res(veg_data))) %>% 
  set_units(m^2) %>% 
  set_units(km^2)

print(repr_data)
## # A tibble: 32 × 6
##    summary feature       total_amount absolute_held relative_held absolute_held_k…
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>           [km^2]
##  1 overall vegetation.1          16.0         0            0                 0    
##  2 overall vegetation.2          14.3         0            0                 0    
##  3 overall vegetation.3          10.4         0            0                 0    
##  4 overall vegetation.4          17.8         0            0                 0    
##  5 overall vegetation.5          13.0         0            0                 0    
##  6 overall vegetation.6          14.3         0            0                 0    
##  7 overall vegetation.7          20.0         0            0                 0    
##  8 overall vegetation.8          14.0         0            0                 0    
##  9 overall vegetation.9          18.0         0.846        0.0470            0.834
## 10 overall vegetation.10         20.0         0            0                 0    
## # … with 22 more rows
sum(repr_data$relative_held < 0.1)
## [1] 17
plot(abundance_data$absolute_abundance ~ repr_data$relative_held)

Questions

  1. What is the avergae proportion of the features held in protected areas? The average proportion of features held in protected areas is 0, 0, 0, 0, 0, 0, 0, 0, 0.0469856, 0, 0, 0.0207843, 0.0223709, 0.2776706, 0, 0, 0.5755541, 0.7199336, 0.5084657, 0.4877022, 0.8080939, 0.5657124, 0.798157, 0.6319006, 0.3591633, 0.4027306, 0.5366816, 0.4668036, 0.2860463, 0.149548, 0.0340036, 0.03125. 24%

  2. If we set a target of 10% coverage by protected areas, how many features fail to meet this target?

17 fail to meet this target. Ask friends about this one.

  1. If we set a target of 20% coverage by protected areas, how many fail?

18 fail to meet this target.

  1. Is there a relationship between the total abundance of a feature and how well it is represented by protected areas?

There is no relationship between the total abundance and its representation in protected areas.

Spatial Prioritizations

print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 6
## names       :   id,             cost, status, locked_in, locked_out, pa_status 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1
# make prioritization problem 
p1_rds <- file.path(dir_data, "p1.rds")
if(!file.exists(p1_rds)){
  p1 <- problem(pu_data, veg_data, cost_column = "cost") %>% 
  add_min_set_objective() %>% 
  add_relative_targets(0.05) %>% 
  add_binary_decisions() %>% 
  add_lpsymphony_solver()

  saveRDS(p1, p1_rds)
}

p1 <- readRDS(p1_rds)

print(p1)

s1 <- solve(p1)

print(s1)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
eval_n_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall    15
eval_cost_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall  385.
eval_feature_representation_summary(p1, s1[, "solution_1"])
## # A tibble: 32 × 5
##    summary feature       total_amount absolute_held relative_held
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>
##  1 overall vegetation.1          16.0          2.90        0.181 
##  2 overall vegetation.2          14.3          1           0.0699
##  3 overall vegetation.3          10.4          1           0.0963
##  4 overall vegetation.4          17.8          3           0.168 
##  5 overall vegetation.5          13.0          1           0.0769
##  6 overall vegetation.6          14.3          1.94        0.136 
##  7 overall vegetation.7          20.0          1.75        0.0875
##  8 overall vegetation.8          14.0          2.80        0.200 
##  9 overall vegetation.9          18.0          2.16        0.120 
## 10 overall vegetation.10         20.0          2           0.0999
## # … with 22 more rows
spplot(s1, "solution_1", col.regions = c("gray", "darkgreen"), main = "s1", colorkey = F)

Questions

  1. How many planing units were selected in the prioritization? What proportion of planning units were selected in the prioritization?

There were 15 planning units selected which was 0.0290698% of the total.

  1. Is there a pattern in the spatial distribution of the priority areas?

There is not pattern in the spatial distribution.

  1. Can you verify that all of the targets were met in the prioritization?

Yes you can verify this because all of the relative_held in solution 1 are above 5%

Adding complexity

# plot locked_in data
# TRUE = blue, FALSE = grey
spplot(pu_data, "locked_in", col.regions = c("grey", "darkblue"),
       main = "locked_in", colorkey = FALSE)

# make prioritization problem
p2_rds <- file.path(dir_data, "p2.rds")
if (!file.exists(p2_rds)){
  p2 <- problem(pu_data, veg_data, cost_column = "cost") %>%
      add_min_set_objective() %>%
      add_relative_targets(0.05) %>%
      add_locked_in_constraints("locked_in") %>%
      add_binary_decisions() %>%
      add_lpsymphony_solver()
  saveRDS(p2, p2_rds)
}
p2 <- readRDS(p2_rds)

# print problem
print(p2)

# solve problem
s2 <- solve(p2)

# plot solution
# selected = green, not selected = grey
spplot(s2, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s2",
       colorkey = FALSE)

10% targets

# make prioritization problem
p3_rds <- file.path(dir_data, "p3.rds")
if (!file.exists(p3_rds)){
  p3 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p3, p3_rds)
}
p3 <- readRDS(p3_rds)

# print problem
print(p3)

# solve problem
s3 <- solve(p3)

# plot solution
# selected = green, not selected = grey
spplot(s3, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s3",
       colorkey = FALSE)

Lock out degraded areas

# plot locked_out data
# TRUE = red, FALSE = grey
spplot(pu_data, "locked_out", col.regions = c("grey80", "darkred"),
       main = "locked_out", colorkey = FALSE)

# make prioritization problem
p4_rds <- file.path(dir_data, "p4.rds")
if (!file.exists(p4_rds)){
  p4 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p4, p4_rds)
}
p4 <- readRDS(p4_rds)
# print problem
print(p4)

# solve problem
s4 <- solve(p4)

# plot solution
# selected = green, not selected = grey
spplot(s4, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s4",
       colorkey = FALSE)

Questions

  1. What is the cost of the planning units selected in s2, s3, and s4? 2. How many planning units are in s2, s3, and s4?

s2 costs 6600.1 and has overall, 205 planning units, s3 costs 6669.9 and has overall, 211 planning units, and s4 costs 6711.6 and has overall, 212planning units.

  1. Do the solutions with more planning units have a greater cost? Why (or why not)?

The solutions with more planning units have higher costs because there are more planning units.

  1. Why does the first solution (s1) cost less than the second solution with protected areas locked into the solution (s2)?

The first solution costs less than the second solution because the first solution holds the top 15 planning units but they are most likely more expensive. The second solution has more constraints to it is likely more expensive.

  1. Why does the third solution (s3) cost less than the fourth solution solution with highly degraded areas locked out (s4)?

The degraded areas cost less to protect so solution 4 is more expensive with the degraded areas locked out.

Penalizing Fragmentation

# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds)){
  p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)

# print problem
print(p5)

# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)

# print solution
print(s5)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
       colorkey = FALSE)

Questions

  1. What is the cost the fourth (s4) and fifth (s5) solutions? Why does the fifth solution (s5) cost more than the fourth (s4) solution?

The fifth solution costs more than the forth solution because they are clumped together and this clump is in an expensive part of the map so it costs more to protect all of these areas than the cheaper more spread out ones.

  1. Try setting the penalty value to 0.000000001 (i.e. 1e-9) instead of 0.001. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this is a useful penalty value? Why (or why not)?
# make prioritization problem
p6_rds <- file.path(dir_data, "p6.rds")
if (!file.exists(p6_rds)){
  p6 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.000000001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p6, p6_rds)
}
p6 <- readRDS(p6_rds)
# print problem
print(p6)
# solve problem,
# note this will take a bit longer than the previous runs
s6 <- solve(p6)
# print solution
print(s6)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# plot solution
# selected = green, not selected = grey
spplot(s6, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s6",
       colorkey = FALSE)

If you set the penalty too low nothing changes and there is no change in the cost of the planning units.

  1. Try setting the penalty value to 0.5. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this a useful penalty value? Why (or why not)?
# make prioritization problem
p7_rds <- file.path(dir_data, "p7.rds")
if (!file.exists(p7_rds)){
  p7 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.5) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p7, p7_rds)
}
p7 <- readRDS(p7_rds)
# print problem
print(p7)
# solve problem,
# note this will take a bit longer than the previous runs
s7 <- solve(p7)
# print solution
print(s7)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# plot solution
# selected = green, not selected = grey
spplot(s7, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s7",
       colorkey = FALSE)

The solution cost with a penalty value of 0.5 is $420196.69. This is very different from solution 4 with significantly higher costs. This is not a useful penalty value because you get one giant grouping you get one giant clump but this extreme is not useful because the cost is crazy high.